import numpy as np
from transformers import PreTrainedTokenizer

from zkl_llmpt_iterator.tokenizer.tokenizer import TextTokenizer


class HuggingFaceTokenizer(TextTokenizer):
    def __init__(self, base: PreTrainedTokenizer):
        self._base = base

    @property
    def base(self) -> PreTrainedTokenizer:
        return self._base

    @property
    def vocab_tokens_n(self) -> int:
        return len(self._base)

    def encode(self, text: str) -> np.ndarray:
        return np.asarray(self._base.encode(text), dtype=np.int64)

    def decode(self, tokens: np.ndarray) -> str:
        return self._base.decode(tokens)
